I downloaded from the Biomercator output the QTLs and meta-QTLs intervals in base pairs
I used the command bedextraxt from bedops to extract the genes in the QTLs and meta-QTLs intervals
The .bed reference file of the grapevine genome used to extract the genes is the V1 annotation mapped on the 12X.2 structural annotation downloaded from https://urgi.versailles.inra.fr/Species/Vitis/Genome-Browser . I downloaded each chromosome gff file and pasted into unique file and modified to format it like .bed file
Then I executed the following commands to sort bed files and extract genes in the intervals
for d in ./*/ ; do (cd "$d" && sed -i 's/^\<[0-9]*\>/chr&/g' *_metaqtls ); done
for d in ./*/ ; do (cd "$d" && for f in *_metaqtls; do ~/PROGRAMS/bedops/bin/sort-bed $f > $f.sorted; done ); done
for d in ./*/ ; do (cd "$d" && split -l 1 *.sorted --additional-suffix=.ok ); done
for d in ./*/ ; do (cd "$d" && mkdir ready_for_bedextract ); done
for d in ./*/ ; do (cd "$d" && mv *.ok ready_for_bedextract ); done
for d in ./*/ ; do (cd "$d/ready_for_bedextract/" && for f in *.ok; do mv $f `cut -f 4 $f` ; done ); done
for d in ./*/ ; do (cd "$d/ready_for_bedextract/" && for f in *; do ~/PROGRAMS/bedops/bin/bedextract ../../../copy/V2.1_ok.bed $f > $f.bed ; done ); done
Then I loaded in R all the list of genes comprised in the meta-QTLs intervals
options(knitr.kable.NA = '')
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/mqtl_and_qtl_genes_bedops/metaqtl/")
filenames <- list.files(path = ".", pattern = ".bed")
kable(filenames, align="l")
| flo_LG1_vey_1.bed |
| flo_LG1_vey_2.bed |
| flo_LG14_vey_1.bed |
| flo_LG14_vey_2.bed |
| flo_LG14_vey_3.bed |
| flo_LG2_vey_1.bed |
| flo_LG7_vey_1.bed |
| pheno_LG1_vey_1.bed |
| pheno_LG1_vey_2.bed |
| pheno_LG14_vey_2.bed |
| pheno_LG16_vey_1.bed |
| pheno_LG17_vey_1.bed |
| pheno_LG18_ripe+F-V_1.bed |
| pheno_LG2_vey_1.bed |
| pheno_LG2_vey_2.bed |
| pheno_LG2_vey_3.bed |
| pheno_LG2_vey_ver+Vr-Rp_3.bed |
| pheno_LG6_vey_1.bed |
| ripe_LG13_vey_bis_1.bed |
| ripe_LG13_vey_bis_2.bed |
| ripe_LG18_vey_1.bed |
| ripe_LG2_vey_1.bed |
| ripe_LG2_vey_2.bed |
| ripe_LG2_vey_3.bed |
| ripe_LG3_vey_1.bed |
| ver_LG1_vey_1.bed |
| ver_LG2_vey_1.bed |
| ver_LG2_vey_2.bed |
| ver_LG2_vey_3.bed |
all_metaqtl <- lapply(filenames, read.table, header = F, sep="\t", col.names = c("metaQTL","chr","start","end","gene")) ## load all the file in one command
names(all_metaqtl) <- filenames
names(all_metaqtl) <- gsub(pattern=".bed", replacement="", x=names(all_metaqtl)) ## remove .bed suffix
all_mqtl_merge <- Reduce(function(...) merge(...,by=c("gene","chr","start","end"), all=T), all_metaqtl) ## merge the list of file in one big dataframe
colnames(all_mqtl_merge) <- c("gene","chr","start","end", names(all_metaqtl))
kable(all_mqtl_merge[1:6,1:18], align="l")
| gene | chr | start | end | flo_LG1_vey_1 | flo_LG1_vey_2 | flo_LG14_vey_1 | flo_LG14_vey_2 | flo_LG14_vey_3 | flo_LG2_vey_1 | flo_LG7_vey_1 | pheno_LG1_vey_1 | pheno_LG1_vey_2 | pheno_LG14_vey_2 | pheno_LG16_vey_1 | pheno_LG17_vey_1 | pheno_LG18_ripe+F-V_1 | pheno_LG2_vey_1 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_01s0011g00620 | chr1 | 563253 | 566683 | flo_LG1_vey_1 | |||||||||||||
| VIT_01s0011g00630 | chr1 | 572047 | 573571 | flo_LG1_vey_1 | |||||||||||||
| VIT_01s0011g00640 | chr1 | 573650 | 579327 | flo_LG1_vey_1 | |||||||||||||
| VIT_01s0011g00650 | chr1 | 586438 | 589926 | flo_LG1_vey_1 | |||||||||||||
| VIT_01s0011g00660 | chr1 | 590563 | 592859 | flo_LG1_vey_1 | |||||||||||||
| VIT_01s0011g00670 | chr1 | 593582 | 596330 | flo_LG1_vey_1 |
The QTL genes loaded were only the ones from QTLs related to phenology, identified from the pattern option in list.files command
options(knitr.kable.NA = '')
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/mqtl_and_qtl_genes_bedops/qtl/V1_on_12x.2")
filenames <- list.files(path = ".", pattern = ("VT|F-V|VB|VE|Vr|VP|V-R|Vr-Rp|F-R"))
kable(length(filenames), align="l")
| 56 |
all_qtl <- lapply(filenames, read.table, header = F, sep="\t", col.names = c("QTL","chr","start","end","gene"))
names(all_qtl) <- filenames
names(all_qtl) <- gsub(pattern=".bed", replacement="", x=names(all_qtl)) ## remove .bed suffix
all_qtl_merge <- Reduce(function(...) merge(...,by=c("gene","chr","start","end"), all=T), all_qtl)
colnames(all_qtl_merge) <- c("gene","chr","start","end", names(all_qtl))
kable(all_qtl_merge[1:6,1:18], align="l")
| gene | chr | start | end | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Carreno_2005_VT_3 | Carreno_2007_VT_1 | Carreno_2008_VT_2 | Costantini_2003_F-R_1 | Costantini_2003_F-V_1 | Costantini_2003_F-V_2 | Costantini_2003_F-V_3 | Costantini_2003_V-R_1 | Costantini_2003_V-R_2 | Costantini_2003_VP_1 | Costantini_2003_VT_1 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_00s0229g00010 | chr2 | 5346706 | 5349092 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 | ||||||||||
| VIT_00s0229g00020 | chr2 | 5357779 | 5376350 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 | ||||||||||
| VIT_00s0229g00030 | chr2 | 5376351 | 5386049 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 | ||||||||||
| VIT_00s0229g00040 | chr2 | 5389560 | 5389838 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 | ||||||||||
| VIT_00s0229g00050 | chr2 | 5391527 | 5392605 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 | ||||||||||
| VIT_00s0229g00060 | chr2 | 5395355 | 5398517 | BayoCanha_2008_Vr_1 | BayoCanha_2009_Vr_2 | BayoCanha_2010_Vr-Rp_1 | Costantini_2003_V-R_1 |
Now I load the original AFI file as received from Sara
options(knitr.kable.NA = '')
## merge qtls and metaqtls (this command I cannot do it on the laptop, too memory consuming)
# all_qtls_metaqtls_V1_on_12x.2 <- merge(all_qtl_merge, all_mqtl_merge, all=T)
## I load the file elaborated on the server
# write.table(all_qtls_metaqtls_V1_on_12x.2, file="../all_qtls_metaqtls_V1_on_12x.2.txt", row.names=F, quote=F, sep="\t", dec=".")
all_qtls_metaqtls_V1_on_12x.2 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/all_qtls_metaqtls_V1_on_12x.2.txt", header=T)
afi_original <- read_excel("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/OLD-FILES/nuovi_database/Annotation_Families_Integrated_2016.xlsx", sheet= "Annotation_Families_Integrated")
kable(head(afi_original), align="l")
| Nimblegen Probe_ID | Unique ID | Agilent Probe_ID | Corvina_Annotation/Others | old 12Xv1 name | 12Xv0 ID | Identical genes in 8X or other EST | Probeset grapegen | Chromosome position 12X | Cardinality between 12Xv0 and v1 | Cardinality between 8X and 12Xv1 | Track 12X v1 | Functional annotation | Reference | Network | Gene Ontology | Function - R |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CHR3_JGVV88_67_T01 | VIT_03s0088g00510 | CUST_9884_PI428914343 | JGVv88.67.t01 | GSVIVT01037034001 | GSVIVP00038603001 | chr03_08696026_08697570 | OK | V1 | hypothetical MADS-box type 1 alpha 1b (VviMADS1A1b) | Grimplet, J. 2016 | vv60042MADS | Transcription Factor Activity | hypothetical MADS-box type 1 alpha 1b (VviMADS1A1b)_VIT_03s0088g00510 | |||
| CHR1_JGVV10_103_T01 | VIT_01s0010g01500 | CUST_26841_PI428914343 | JGVv10.103.t01 | GSVIVT01010218001 | GSVIVP00021252001 | chr01_17760880_17762318 | OK | V1 | hypothetical MADS-box type 1 alpha 1d (VviMADS1A1d) | Grimplet, J. 2016 | vv60042MADS | Transcription Factor Activity | hypothetical MADS-box type 1 alpha 1d (VviMADS1A1d)_VIT_01s0010g01500 | |||
| CHR3_JGVV88_59_T01 | VIT_03s0088g00610 | CUST_9891_PI428914343 | JGVv88.59.t01 | GSVIVT01037022001 | GSVIVP00038593001 | chr03_08820035_08820694 | OK | V1 | hypothetical MADS-box type 1 alpha 1f (VviMADS1A1f) | Grimplet, J. 2016 | vv60042MADS | Transcription Factor Activity | hypothetical MADS-box type 1 alpha 1f (VviMADS1A1f)_VIT_03s0088g00610 | |||
| CHR3_JGVV88_61_T01 | VIT_03s0088g00590 | CUST_9889_PI428914343 | JGVv88.61.t01 | GSVIVT01037026001 | GSVIVP00038595001 | chr03_08776002_08776646 | OK | V1 | hypothetical MADS-box type 1 alpha 1g (VviMADS1A1g) | Grimplet, J. 2016 | vv60042MADS | Transcription Factor Activity | hypothetical MADS-box type 1 alpha 1g (VviMADS1A1g)_VIT_03s0088g00590 | |||
| CHR12_JGVV142_18_T01 | VIT_12s0142g00360 | CUST_22066_PI428914343 | JGVv142.18.t01 | GSVIVT01000802001 | GSVIVP00018932001 GSVIVP00018930001 | VVTU5956_at | chr12_00279426_00291956 | merge2 | V1 | putative MADS-box Agamous 1 (VviAG1) | Grimplet, J. 2016 | vv60042MADS | Transcription Factor Activity | putative MADS-box Agamous 1 (VviAG1)_VIT_12s0142g00360 | ||
| CHR10_JGVV3_187_T01 | VIT_10s0003g02070 | CUST_25054_PI428914343 | JGVv3.187.t01 | GSVIVT01021303001 | TC62522 DT011330 | chr10_03730392_03738386 | OK mrna | V1 | putative MADS-box Agamous 2 (VviAG2) | Grimplet, J. 2016 | vv30009Flower_development vv60042MADS | Transcription Factor Activity | putative MADS-box Agamous 2 (VviAG2)_VIT_10s0003g02070 |
## remove useless information
afi_original_reduced <- afi_original[,c(2,13)]
afi_original_reduced <- dplyr::filter(afi_original_reduced, grepl("VIT", `Unique ID`)) ## some filtering
## merge qtls and metaqtls with afi (this commands I cannot do it on the laptop, too memory consuming)
# all_qtls_metaqtls_V1_on_12x.2_with_annotation <- merge(all_qtls_metaqtls_V1_on_12x.2, afi_original_reduced, by.x = "gene", by.y="Unique ID", all=T)
# all_qtls_metaqtls_V1_on_12x.2_with_annotation <- merge(all_qtls_metaqtls_V1_on_12x.2, afi_original_reduced, all=T)
# kable(head(all_qtls_metaqtls_V1_on_12x.2_with_annotation), align="l")
# from here on I performed different merge to attach the new transcriptomic datasets to the afi file, until i get this big file with all the necessary information
new_afi_definitive <- read_excel("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/all_qtls_metaqtls_V1_on_12x.2_with_annotation.xlsx")
dim(new_afi_definitive)
## [1] 34009 97
The table is available at https://pietrod.shinyapps.io/Annotation_Families_Integrated/
I received 6 different datasets of Pinot Noir and Cabernet Sauvignon cultivars RNA-Seq FPKM data for 3 different years of berries samples around veraison time
I load the original dataset to perform filtering and cleaning. We need to identify the molecular veraison for the different years and cultivars in order to align the datasets and perform clustering
options(knitr.kable.NA = '')
pn.12 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/PN12_FPKM_table.txt", header=T, row.names=1)
pn.13 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/PN13_FPKM_table.tsv", header=T, row.names=1)
pn.14 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/PN14_FPKM_table.tab", header=T, row.names=1)
cs.12 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/CS12_FPKM_table.txt", header=T, row.names=1)
cs.13 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/CS13_FPKM_table.tsv", header=T, row.names=1)
cs.14 <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/CS14_FPKM_table.tab", header=T, row.names=1)
dim(pn.12)
## [1] 29971 30
dim(pn.13)
## [1] 29971 33
dim(pn.14)
## [1] 29971 36
dim(cs.12)
## [1] 29971 39
dim(cs.13)
## [1] 29971 42
dim(cs.14)
## [1] 29971 39
kable(pn.12[1:6,1:18],align="l")
| PN_0_1_0 | PN_0_2_0 | PN_0_3_0 | PN_A_1_1 | PN_A_2_1 | PN_A_3_1 | PN_A_1_2 | PN_A_2_2 | PN_A_3_2 | PN_A_1_3 | PN_A_2_3 | PN_A_3_3 | PN_A_1_4 | PN_A_2_4 | PN_A_3_4 | PN_A_1_5 | PN_A_2_5 | PN_A_3_5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_12s0028g03890 | 6.982480 | 6.200700 | 6.599040 | 12.637400 | 6.984890 | 13.07730 | 8.741250 | 7.658590 | 8.0912300 | 14.24840 | 15.1014000 | 13.6873000 | 11.93210 | 11.91460 | 13.7933000 | 11.4142000 | 11.89900 | 13.9172000 |
| VIT_00s0301g00130 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.0000000 | 0.00000 | 0.0000000 | 0.0000000 | 0.00000 | 0.00000 | 0.0000000 | 0.0000000 | 0.00000 | 0.0000000 |
| VIT_15s0021g01700 | 0.976611 | 0.480536 | 0.936513 | 1.605550 | 1.082410 | 1.83607 | 2.363640 | 1.279090 | 1.7194200 | 1.15728 | 1.8783100 | 1.2871100 | 1.10023 | 1.53761 | 1.0152100 | 1.2928300 | 1.03396 | 1.0427500 |
| VIT_03s0132g00350 | 0.105368 | 0.147524 | 0.000000 | 0.103088 | 0.128263 | 0.00000 | 0.165335 | 0.133831 | 0.0519011 | 0.00000 | 0.0498966 | 0.0434941 | 0.00000 | 0.00000 | 0.0443722 | 0.0685977 | 0.00000 | 0.0438471 |
| VIT_17s0000g09240 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.0000000 | 0.00000 | 0.0000000 | 0.0000000 | 0.00000 | 0.00000 | 0.0000000 | 0.0000000 | 0.00000 | 0.0000000 |
| VIT_08s0058g00890 | 11.724200 | 12.667300 | 11.477000 | 27.740300 | 15.504900 | 26.53580 | 24.262500 | 24.121700 | 23.9950000 | 47.95110 | 43.5116000 | 36.4103000 | 42.22530 | 41.69190 | 42.3358000 | 33.3064000 | 30.71960 | 36.8685000 |
The idea is to calculate the number of genes, in particular the marker transitions early up, that show a value of log2FC greater than 1.5 in the intervals around veraison, in order to identify what time interval shows the highest number of mt genes moving and displaying significant increase in the expression level, to be able to say when is molecular veraison occurring
I wrote an R function that starting from the raw datasets of FPKM perform filtering and cleaning according to Sara suggestions and then calculate the fold change of the mt genes between each pair of time point from T0 to T5 and return the number of genes with a log2FC higher than 1.5
The function works like this:
molecular_veraison_windows <- function(filename) {
dat <- read.table(file = paste0("../../../nuovi_dataset/",filename), header=T, row.names=1)
clmn.n <- length(names(dat))
col.to.add <- (length(names(dat)) + 1) : ((length(names(dat)) + (length(names(dat))/3)))
dat[, col.to.add] <- (ifelse(sapply(seq(1,length(names(dat)),by=3),function(i) rowSums(dat[,i:(i+2)] < 1, na.rm=T) > 1),NA,"keep"))
dat.1 <- dat[rowSums(is.na(dat[,col.to.add]))!=ncol(dat[,col.to.add]), ]
dat.1 <- dat.1[,1:clmn.n]
colnames(dat.1) <- paste0(rep("t", length(names(dat.1))), rep(0:((length(names(dat.1))/3)-1),each=3))
# number of columns per group (1-3, 4-6)
n <- 3
# number of groups
n_grp <- ncol(dat.1) / n
# column indices (one vector per group)
idx_grp <- split(seq(dat.1), rep(seq(n_grp), each = n))
# calculate the row means for all groups
res <- lapply(idx_grp, function(i) {
# subset of the data frame
tmp <- dat.1[i]
# calculate row means
rowMeans(tmp, na.rm = TRUE)
})
# transform list into a data frame
dat2 <- as.data.frame(res)
# extract names of first column of each group
names_frst <- names(dat.1)[sapply(idx_grp, "[", 1)]
# modify column names of new data frame
names(dat2) <- names_frst
library(readxl)
m.t.all <- read_excel("../../../geni_marcatori_transizioni_copy.xlsx", sheet="all")
early_up <- m.t.all[m.t.all$marker_transitions == "early_up",]
dat2_early_up <- dat2[rownames(dat2) %in% early_up$ID,]
dat2_early_up <- as.data.frame(dat2_early_up)
dat2_early_up$FC0_1 <- dat2_early_up$t1 / dat2_early_up$t0
dat2_early_up$FC1_2 <- dat2_early_up$t2 / dat2_early_up$t1
dat2_early_up$FC2_3 <- dat2_early_up$t3 / dat2_early_up$t2
dat2_early_up$FC3_4 <- dat2_early_up$t4 / dat2_early_up$t3
dat2_early_up$FC4_5 <- dat2_early_up$t5 / dat2_early_up$t4
dat2_early_up_FC <- log2(dat2_early_up[,n_grp:(n_grp+5)])
print(dim(dat2_early_up_FC[dat2_early_up_FC$FC0_1 > 1.5 , ]))
print(dim(dat2_early_up_FC[dat2_early_up_FC$FC1_2 > 1.5 , ]))
print(dim(dat2_early_up_FC[dat2_early_up_FC$FC2_3 > 1.5 , ]))
print(dim(dat2_early_up_FC[dat2_early_up_FC$FC3_4 > 1.5 , ]))
print(dim(dat2_early_up_FC[dat2_early_up_FC$FC4_5 > 1.5 , ]))
}
Example of the function
# load the function
source("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/molecular_veraison_windows.r")
molecular_veraison_windows("PN12_FPKM_table.txt")
## [1] 27 6
## [1] 0 6
## [1] 127 6
## [1] 4 6
## [1] 0 6
In this example (Pinot 2012) the molecular veraison is considered between T2 and T3 - 127 genes (do not consider the second number, it is just the number of columns of the dataset returned as result). The table displayed before the numbers is a snapshot of the original dataset cleaned and where the values of the replicates have been averaged
Results for both cultivars and all years
options(knitr.kable.NA = '')
all.results <- read_excel("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/molecular_veraison-db.xlsx", sheet="all_for_R")
kable(all.results, align="l")
| cultivar | year | t0 | genes_number | t1 | genes_number__1 | t2 | genes_number__2 | t3 | genes_number__3 | t4 | genes_number__4 | t5 | results_mv |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PN | 2012 | -36 | 27 | -28 | 0 | -18 | 127 | -9 | 4 | 0 | 0 | t3 | |
| PN | 2013 | -20 | 11 | -14 | 52 | -7 | 26 | 0 | 0 | 0 | t2 | ||
| PN | 2014 | -20 | 6 | -14 | 38 | -6 | 58 | 0 | 0 | 0 | t3 | ||
| CS | 2012 | -37 | 60 | -28 | 20 | -18 | 56 | -9 | 70 | 0 | 0 | t4 | |
| CS | 2013 | -26 | 2 | -20 | 8 | -13 | 88 | -6 | 1 | 0 | 1 | t3 | |
| CS | 2014 | -23 | 8 | -14 | 18 | -7 | 80 | 0 | 8 | 0 | t3 |
To perform clustering I used the software called Clust, available at https://github.com/BaselAbujamous/clust . It is a pyhton program which takes as input the raw datasets and perform filtering, normalization, centering and clustering. It can handle n amount of datasets with different time points or conditions
We ran Clust both on single datasets keeping all the time points, and on multiple datasets together after sincronizing the time points around the molecular veraison identified previously (and then reducing the number of time points)
The clustering process was run not on the entire dataset but only on the genes in the QTLs intervals, that I extracted from the new AFI file
options(knitr.kable.NA = '')
pn.12.qtl.genes <- pn.12[rownames(pn.12) %in% all_qtl_merge$gene, ]
kable(pn.12.qtl.genes[1:6,1:18], align="l")
| PN_0_1_0 | PN_0_2_0 | PN_0_3_0 | PN_A_1_1 | PN_A_2_1 | PN_A_3_1 | PN_A_1_2 | PN_A_2_2 | PN_A_3_2 | PN_A_1_3 | PN_A_2_3 | PN_A_3_3 | PN_A_1_4 | PN_A_2_4 | PN_A_3_4 | PN_A_1_5 | PN_A_2_5 | PN_A_3_5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_03s0132g00350 | 0.105368 | 0.147524 | 0.00000 | 0.103088 | 0.128263 | 0.0000 | 0.165335 | 0.133831 | 0.0519011 | 0.0000 | 0.0498966 | 0.0434941 | 0.00000 | 0.0000 | 0.0443722 | 0.0685977 | 0.0000 | 0.0438471 |
| VIT_03s0088g00890 | 0.158621 | 0.000000 | 0.00000 | 0.153564 | 0.000000 | 0.0000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000 | 0.0000000 | 0.0000000 | 0.00000 | 0.0000 | 0.0000000 | 0.0000000 | 0.0000 | 0.0000000 |
| VIT_02s0025g02400 | 7.086780 | 9.338490 | 6.76182 | 10.156600 | 8.812340 | 10.5243 | 9.226640 | 9.140880 | 9.2230700 | 11.8870 | 12.1128000 | 11.1657000 | 9.98114 | 12.1609 | 10.1839000 | 10.2910000 | 10.6120 | 10.4304000 |
| VIT_03s0110g00560 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.0000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000 | 0.0000000 | 0.0000000 | 0.00000 | 0.0000 | 0.0000000 | 0.0000000 | 0.0000 | 0.0000000 |
| VIT_02s0154g00030 | 0.905504 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.0000 | 0.000000 | 0.000000 | 0.0000000 | 0.0000 | 0.0000000 | 0.0000000 | 0.00000 | 0.0000 | 0.0000000 | 0.0000000 | 0.0000 | 0.0000000 |
| VIT_02s0025g04120 | 9.655220 | 11.126800 | 7.81751 | 7.972000 | 10.320300 | 10.3643 | 10.103400 | 7.677600 | 7.3933500 | 12.1458 | 8.5257800 | 8.1271900 | 12.76890 | 16.8831 | 13.1693000 | 9.1092600 | 11.4073 | 9.5876300 |
dim(pn.12.qtl.genes)
## [1] 8091 30
The number of genes in all the QTLs intervals is 8091. The same subsetting has been applied to all 6 datasets
Clust was then ran with the same following commands. Before on single datasets without correction around molecular veraison
clust Data-pn_2012/ -n normalization-pn_2012.txt -r replicates_file-pn_2012.txt -fil-v 1.1 -fil-c 1 -fil-d 1 -np 18
And after correction when running on multiple datasets together, for example on the three years of Pinot together
clust Data-all-pn/ -n normalization-all-pn.txt -r replicates_file-all-pn.txt -fil-v 1.1 -fil-c 1 -fil-d 3 -np 18
Clustering results
The following table summarizes the results obtained for the clustering before and after centering around molecular veraison
options(knitr.kable.NA = '')
clust.results.before <- read_excel("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/clust_results_summary.xlsx", sheet="before")
kable(clust.results.before, align="l")
| cultivar | year | datasets | total genes | filtered | clusters | genes in clusters | smallest | largest | not included | folder |
|---|---|---|---|---|---|---|---|---|---|---|
| CS | 2012 | 1 | 8091 | 5294 | 11 | 1413 | 11 | 778 | 3881 | Results_28_Nov_17 |
| CS | 2013 | 1 | 8091 | 5253 | 14 | 1941 | 12 | 754 | 3312 | Results_28_Nov_17 |
| CS | 2014 | 1 | 8091 | 5136 | 10 | 1907 | 25 | 810 | 3229 | Results_28_Nov_17 |
| PN | 2012 | 1 | 8091 | 5223 | 15 | 2646 | 11 | 834 | 2577 | Results_28_Nov_17 |
| PN | 2013 | 1 | 8091 | 5184 | 12 | 2076 | 25 | 763 | 3108 | Results_28_Nov_17 |
| PN | 2014 | 1 | 8091 | 5091 | 13 | 2047 | 12 | 914 | 3044 | Results_28_Nov_17 |
| CS+PN | 2012 | 2 | 8091 | 4981 | 9 | 806 | 11 | 293 | 4175 | Results_29_Nov_17_2012 |
| CS+PN | 2013 | 2 | 8091 | 4947 | 7 | 893 | 12 | 424 | 4054 | Results_29_Nov_17_2013 |
| CS+PN | 2014 | 2 | 8091 | 4901 | 8 | 727 | 12 | 321 | 4174 | Results_29_Nov_17_2014 |
| CS | all | 3 | 8091 | 4976 | 5 | 640 | 15 | 415 | 4336 | Results_28_Nov_17_Data-all-cs |
| PN | all | 3 | 8091 | 4924 | 6 | 882 | 17 | 455 | 4042 | Results_28_Nov_17_Data-all-pn |
| CS+PN | all | 6 | 8091 | 4714 | 4 | 471 | 11 | 244 | 4243 | Results_28_Nov_17_Data-all |
clust.results.after <- read_excel("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/clust_results_summary.xlsx", sheet="after")
kable(clust.results.after, align="l")
| cultivar | year | datasets | total genes | filtered | clusters | genes in clusters | smallest | largest | not included | folder |
|---|---|---|---|---|---|---|---|---|---|---|
| CS | 2012 | 1 | 8091 | 5142 | 16 | 2110 | 12 | 875 | 3032 | cs12 |
| CS | 2013 | 1 | 8091 | 5118 | 14 | 2147 | 11 | 890 | 2971 | cs13 |
| CS | 2014 | 1 | 8091 | 5115 | 12 | 2127 | 17 | 885 | 2988 | cs14 |
| PN | 2012 | 1 | 8091 | 5223 | 15 | 2646 | 11 | 834 | 2577 | pn12 |
| PN | 2013 | 1 | 8091 | 5184 | 12 | 2076 | 25 | 763 | 3108 | pn13 |
| PN | 2014 | 1 | 8091 | 5091 | 13 | 2047 | 12 | 914 | 3044 | pn14 |
| CS+PN | 2012 | 2 | 8091 | 4922 | 6 | 528 | 15 | 322 | 4394 | Results_01_Dec_17_2012 |
| CS+PN | 2013 | 2 | 8091 | 4880 | 8 | 895 | 12 | 425 | 3985 | Results_01_Dec_17_2013 |
| CS+PN | 2014 | 2 | 8091 | 4893 | 6 | 894 | 11 | 363 | 3999 | Results_01_Dec_17_2014 |
| CS | all | 3 | 8091 | 4808 | 5 | 967 | 11 | 442 | 3841 | Results_06_Dec_17_cs |
| PN | all | 3 | 8091 | 4865 | 9 | 1206 | 12 | 565 | 3659 | Results_06_Dec_17_pn |
| CS+PN | all | 6 | 8091 | 4610 | 4 | 547 | 11 | 268 | 4063 | Results_06_Dec_17_all |
Some example plots from the clustering results of Clust
Before I load the genes (VIT) placed in the clusters for each separate datasets
options(knitr.kable.NA = '')
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/Before-correction/Results_28_Nov_17/")
filenames <- list.files(path = ".",pattern=("ts.tsv"), recursive=T)
all_clust <- lapply(filenames, read.table, head=T, sep="\t", na.strings="")
names(all_clust) <- filenames
names(all_clust) <- gsub(pattern="Results_28_Nov_17_", replacement="", x=names(all_clust))
names(all_clust) <- gsub(pattern="/Clusters_Objects.tsv", replacement="", x=names(all_clust))
## rimuovo la seconda riga (quella con scritto Genes) da tutti i df ##
all_clust <- lapply(all_clust, function(x) x[2:nrow(x),])
dim(all_clust)
## NULL
Then I load all the data processed by Clust that the program used to plot the results (cleaned, filtered, centered, normalized)
To correct
options(knitr.kable.NA = '')
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/Before-correction/Results_28_Nov_17/")
cl.pn12 <- read.table("Results_28_Nov_17_PN-2012/Clusters_Objects.tsv", head=T, sep="\t", na.strings="")
cl.pn12 <- cl.pn12[2:nrow(cl.pn12),]
dim(cl.pn12)
## [1] 834 15
cl.pn12.up <- cl.pn12[,c(3,4)]
head(cl.pn12.up)
## C2..21.genes. C3..134.genes.
## 2 VIT_01s0011g00710 VIT_01s0011g01520
## 3 VIT_02s0025g03220 VIT_01s0011g01780
## 4 VIT_02s0025g04580 VIT_01s0011g04240
## 5 VIT_02s0025g04850 VIT_01s0011g04620
## 6 VIT_02s0025g04860 VIT_01s0011g05000
## 7 VIT_03s0091g00260 VIT_01s0011g05030
cl.pn13 <- read.table("Results_28_Nov_17_PN-2013/Clusters_Objects.tsv", head=T, sep="\t", na.strings="")
cl.pn13 <- cl.pn13[2:nrow(cl.pn13),]
dim(cl.pn13)
## [1] 763 12
cl.pn13.up <- cl.pn13[,c(6:9)]
head(cl.pn13.up)
## C5..25.genes. C6..54.genes. C7..35.genes. C8..73.genes.
## 2 VIT_01s0011g01070 VIT_01s0011g00620 VIT_00s0229g00160 VIT_01s0011g01300
## 3 VIT_01s0011g02310 VIT_01s0011g00710 VIT_01s0011g03910 VIT_01s0011g02730
## 4 VIT_02s0012g02670 VIT_01s0011g03220 VIT_01s0011g05180 VIT_01s0011g02990
## 5 VIT_02s0025g01010 VIT_01s0011g04030 VIT_01s0011g05240 VIT_01s0011g03610
## 6 VIT_02s0241g00090 VIT_01s0011g04250 VIT_01s0011g05250 VIT_01s0011g04280
## 7 VIT_03s0063g00580 VIT_01s0011g04640 VIT_01s0137g00240 VIT_01s0011g04790
cl.pn14 <- read.table("Results_28_Nov_17_PN-2014/Clusters_Objects.tsv", head=T, sep="\t", na.strings="")
cl.pn14 <- cl.pn14[2:nrow(cl.pn14),]
dim(cl.pn14)
## [1] 914 13
cl.pn14.up <- cl.pn14[,c(8:11)]
head(cl.pn14.up)
## C7..52.genes. C8..18.genes. C9..23.genes. C10..61.genes.
## 2 VIT_01s0011g01140 VIT_01s0011g03920 VIT_01s0011g01520 VIT_00s0226g00090
## 3 VIT_01s0011g01510 VIT_01s0011g05590 VIT_01s0011g03480 VIT_00s0282g00030
## 4 VIT_01s0011g03910 VIT_02s0025g01430 VIT_01s0011g04490 VIT_01s0011g04350
## 5 VIT_01s0011g04240 VIT_02s0025g02360 VIT_02s0012g00630 VIT_01s0011g05830
## 6 VIT_01s0011g06250 VIT_03s0063g00820 VIT_02s0025g02140 VIT_01s0137g00300
## 7 VIT_01s0137g00180 VIT_05s0102g01130 VIT_02s0025g02420 VIT_02s0012g00480
Which and how many up-regulated genes are in common between all the 3 years in Pinot clusters as obtained by Clust on each separate datasets?
options(knitr.kable.NA = '')
## I already know that these are 13 =)
thirteen <- sort(intersect(intersect(unique(as.vector(as.matrix(cl.pn12.up[,c(1:2)]))), unique(as.vector(as.matrix(cl.pn13.up[,c(1:4)])))), unique(as.vector(as.matrix(cl.pn14.up[,c(1:4)])))))
kable(thirteen, align="l")
| VIT_02s0025g01610 |
| VIT_03s0063g01870 |
| VIT_03s0091g00240 |
| VIT_12s0059g00670 |
| VIT_14s0006g01660 |
| VIT_16s0098g00790 |
| VIT_17s0000g05550 |
| VIT_18s0001g09500 |
| VIT_18s0001g09900 |
| VIT_18s0001g11260 |
| VIT_18s0001g12480 |
| VIT_18s0001g12510 |
| VIT_18s0001g13060 |
## Who are these genes?
kable(new_afi_definitive[new_afi_definitive$gene %in% thirteen , c(1,2,3,90:97)], align="l", row.names=F)
| gene | chr | start | chr_position_12X | annotation_V1 | DEG_common | switch_common | NAC | DEG_atlas | switch_atlas | marker_transition |
|---|---|---|---|---|---|---|---|---|---|---|
| VIT_02s0025g01610 | 2 | 1372499 | chr02_01560083_01563233 | Phosphatidylinositol 4-kinase type-II | marker_transition | |||||
| VIT_03s0063g01870 | 3 | 4236634 | chr03_05200808_05203881 | Cold acclimation protein WCOR413 protein beta form | marker_transition | |||||
| VIT_03s0091g00240 | 3 | 6521904 | chr03_06521904_06537905 | Haloacid dehalogenase hydrolase | ||||||
| VIT_12s0059g00670 | 12 | 5504434 | chr12_05504434_05508516 | NADP-dependent oxidoreductase | DEG_atlas | |||||
| VIT_14s0006g01660 | 14 | 18041242 | chr14_18041242_18104242 | Pyridoxal kinase | ||||||
| VIT_16s0098g00790 | 16 | 22650462 | chr16_21130941_21133087 | Copper-binding family protein | DEG_common | marker_transition | ||||
| VIT_17s0000g05550 | 17 | 6183508 | chr17_06051880_06063502 | Proton-dependent oligopeptide transport (POT) family protein | DEG_common | DEG_atlas | ||||
| VIT_18s0001g09500 | 18 | 7942455 | chr18_07942455_07953002 | CYP81B2v1 | ||||||
| VIT_18s0001g09900 | 18 | 8275291 | chr18_08275291_08275974 | No hit | ||||||
| VIT_18s0001g11260 | 18 | 9571971 | chr18_09571971_09578214 | Nucleobase-ascorbate transporter 3 (NAT3) | ||||||
| VIT_18s0001g12480 | 18 | 10670045 | chr18_10670045_10675852 | Metal transporter CNNM4 (Cyclin-M4) | ||||||
| VIT_18s0001g12510 | 18 | 10686816 | chr18_10686816_10697439 | 5’ nucleotidase | marker_transition | |||||
| VIT_18s0001g13060 | 18 | 11153980 | chr18_11153980_11161250 | C3H2C3 ring-finger protein |
Now I retrieve the up-regulated genes as obtained by Clust on the 3 years datasets integrated analysis
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/After-correction/Results_06_Dec_17/Results_06_Dec_17_pn/")
pn.up.together <- read.table("Clusters_Objects.tsv", head=T, sep="\t", na.strings="")
pn.up.together <- pn.up.together[2:nrow(pn.up.together),]
dim(pn.up.together)
## [1] 565 9
head(pn.up.together)
## C0..451.genes. C1..29.genes. C2..23.genes. C3..45.genes.
## 2 VIT_00s0226g00040 VIT_01s0011g02040 VIT_00s0229g00030 VIT_00s1338g00020
## 3 VIT_00s0229g00060 VIT_01s0011g05730 VIT_01s0011g02810 VIT_01s0011g01290
## 4 VIT_00s0555g00050 VIT_02s0012g01730 VIT_02s0025g03560 VIT_01s0011g02310
## 5 VIT_01s0011g00580 VIT_02s0025g02040 VIT_02s0033g01310 VIT_01s0011g04050
## 6 VIT_01s0011g00650 VIT_02s0025g02760 VIT_02s0087g00130 VIT_01s0011g05850
## 7 VIT_01s0011g00700 VIT_03s0091g01230 VIT_03s0038g02340 VIT_01s0011g06030
## C4..22.genes. C5..17.genes. C6..42.genes. C7..565.genes.
## 2 VIT_01s0011g02320 VIT_01s0011g00710 VIT_00s0229g00140 VIT_00s0229g00100
## 3 VIT_01s0137g00670 VIT_01s0011g01520 VIT_00s0229g00160 VIT_00s0229g00180
## 4 VIT_02s0025g04250 VIT_01s0011g04250 VIT_01s0011g00550 VIT_00s0965g00010
## 5 VIT_02s0025g04260 VIT_01s0011g04640 VIT_01s0011g00850 VIT_01s0011g00590
## 6 VIT_03s0063g00820 VIT_01s0011g06250 VIT_01s0011g01640 VIT_01s0011g00610
## 7 VIT_03s0088g00710 VIT_02s0025g00470 VIT_01s0011g03610 VIT_01s0011g00660
## C8..12.genes.
## 2 VIT_02s0025g03100
## 3 VIT_05s0094g01280
## 4 VIT_07s0104g00130
## 5 VIT_11s0016g05100
## 6 VIT_12s0059g02270
## 7 VIT_13s0084g00810
Among these clusters the profiles we are interested in are the ones from clusters C3 and C4
options(knitr.kable.NA = '')
## I already know that these are 62 =)
sixtytwo <- sort(unique(as.vector(as.matrix(pn.up.together[,c(4,6)]))))
kable(sixtytwo)
| VIT_00s1338g00020 |
| VIT_01s0011g00710 |
| VIT_01s0011g01290 |
| VIT_01s0011g01520 |
| VIT_01s0011g02310 |
| VIT_01s0011g04050 |
| VIT_01s0011g04250 |
| VIT_01s0011g04640 |
| VIT_01s0011g05850 |
| VIT_01s0011g06030 |
| VIT_01s0011g06250 |
| VIT_02s0012g00760 |
| VIT_02s0012g01100 |
| VIT_02s0012g01630 |
| VIT_02s0025g00470 |
| VIT_02s0025g01400 |
| VIT_02s0025g03310 |
| VIT_02s0025g03510 |
| VIT_02s0025g04340 |
| VIT_02s0033g01330 |
| VIT_02s0109g00230 |
| VIT_03s0038g02190 |
| VIT_03s0063g00360 |
| VIT_03s0063g01870 |
| VIT_03s0063g02410 |
| VIT_03s0063g02510 |
| VIT_05s0029g00140 |
| VIT_05s0094g00350 |
| VIT_05s0094g01110 |
| VIT_05s0094g01210 |
| VIT_05s0102g00440 |
| VIT_05s0102g01190 |
| VIT_05s0124g00270 |
| VIT_06s0009g02640 |
| VIT_06s0009g03190 |
| VIT_09s0070g00490 |
| VIT_11s0016g03200 |
| VIT_11s0016g03720 |
| VIT_11s0118g00230 |
| VIT_11s0118g00240 |
| VIT_12s0059g00810 |
| VIT_12s0059g01080 |
| VIT_13s0084g00050 |
| VIT_14s0068g00640 |
| VIT_16s0098g00790 |
| VIT_17s0000g04890 |
| VIT_17s0000g05580 |
| VIT_17s0000g05960 |
| VIT_17s0000g06300 |
| VIT_17s0000g07490 |
| VIT_17s0000g07610 |
| VIT_17s0000g07920 |
| VIT_18s0001g00890 |
| VIT_18s0001g02250 |
| VIT_18s0001g02970 |
| VIT_18s0001g03950 |
| VIT_18s0001g08490 |
| VIT_18s0001g11930 |
| VIT_18s0001g13950 |
| VIT_18s0041g00740 |
| VIT_18s0075g00510 |
| VIT_18s0122g00410 |
## Who are these genes?
kable(new_afi_definitive[new_afi_definitive$gene %in% sixtytwo , c(1,2,3,90:97)], align="l")
| gene | chr | start | chr_position_12X | annotation_V1 | DEG_common | switch_common | NAC | DEG_atlas | switch_atlas | marker_transition |
|---|---|---|---|---|---|---|---|---|---|---|
| VIT_01s0011g00710 | 1 | 616107 | chr01_00616107_00618157 | Zinc finger (C2H2 type) family | ||||||
| VIT_01s0011g01290 | 1 | 1098416 | chr01_01098416_01102667 | N-carbamyl-L-amino acid amidohydrolase | ||||||
| VIT_01s0011g01520 | 1 | 1336774 | chr01_01336774_01340483 | Peroxisomal 2,4-dienoyl-CoA reductase | ||||||
| VIT_01s0011g02310 | 1 | 2088704 | chr01_02088704_02090176 | Unknown protein | marker_transition | |||||
| VIT_01s0011g04050 | 1 | 3698303 | chr01_03698303_03708992 | Nucleotidyltransferase | ||||||
| VIT_01s0011g04250 | 1 | 3854482 | chr01_03854482_03857576 | Zinc finger (B-box type) | ||||||
| VIT_01s0011g04640 | 1 | 4174779 | chr01_04174779_04176173 | Auxin Efflux Carrier | ||||||
| VIT_01s0011g05850 | 1 | 5609439 | chr01_05609439_05614892 | SEC14 cytosolic factor | ||||||
| VIT_01s0011g06030 | 1 | 5786224 | chr01_05786224_05797572 | Myrosinase precursor | ||||||
| VIT_01s0011g06250 | 1 | 6043632 | chr01_06043632_06047154 | Xyloglucan endotransglycosylase | ||||||
| VIT_02s0025g00470 | 2 | 369467 | chr02_00557051_00570448 | EMB3001 (embryo defective 3001) | marker_transition | |||||
| VIT_02s0025g01400 | 2 | 1153133 | chr02_01340717_01348463 | Leucine-rich repeat family protein stress | ||||||
| VIT_02s0025g03310 | 2 | 2633914 | chr02_02821498_02823906 | Arsenite transport protein (ArsB) | DEG_common | |||||
| VIT_02s0025g03510 | 2 | 2815103 | chr02_03002687_03014242 | Bromo-adjacenty (BAH) domain-containing protein | ||||||
| VIT_02s0025g04340 | 2 | 3637347 | chr02_03824931_03825899 | Osmotin | DEG_common | switch_common | DEG_atlas | switch_atlas | ||
| VIT_00s1338g00020 | 2 | 5343106 | chrUn_38606407_38609245 | Protein transport protein Sec61 subunit alpha | ||||||
| VIT_02s0012g00760 | 2 | 6778127 | chr02_06666128_06670079 | Haloacid dehalogenase hydrolase | marker_transition | |||||
| VIT_02s0012g01100 | 2 | 7175167 | chr02_07063168_07078721 | Protein phosphatase 2C POLTERGEIST-like 1 | ||||||
| VIT_02s0012g01630 | 2 | 7998696 | chr02_07886697_07889877 | Transmembrane protein 41B | marker_transition | |||||
| VIT_02s0109g00230 | 2 | 12786746 | chr02_12674747_12696922 | Early-responsive to dehydration protein / ERD protein | marker_transition | |||||
| VIT_02s0033g01330 | 2 | 16897866 | chr02_16785867_16789390 | Acyl-CoA binding protein | DEG_common | switch_common | marker_transition | |||
| VIT_03s0038g02190 | 3 | 1502133 | chr03_01502133_01506299 | Nodulin | ||||||
| VIT_03s0063g02510 | 3 | 3696360 | chr03_05738865_05744155 | Protein kinase | ||||||
| VIT_03s0063g02410 | 3 | 3783389 | chr03_05641144_05657126 | Switching protein 3D ATSWI3D | ||||||
| VIT_03s0063g01870 | 3 | 4236634 | chr03_05200808_05203881 | Cold acclimation protein WCOR413 protein beta form | marker_transition | |||||
| VIT_03s0063g00360 | 3 | 5511388 | chr03_03918238_03929127 | Glycoprotein 3-alpha-L-fucosyltransferase | marker_transition | |||||
| VIT_05s0029g00140 | 5 | 14717848 | chr05_14296111_14297784 | ERF/AP2 Gene Family (VvERF005),Dehydration Responsive Element-Binding Transcription Factor (VvDREB05) | DEG_common | DEG_atlas | ||||
| VIT_05s0124g00270 | 5 | 21236964 | chr05_20815227_20843551 | Unknown protein | ||||||
| VIT_05s0102g00440 | 5 | 22815958 | chr05_22394221_22398999 | FAR1-related sequence 5 | ||||||
| VIT_05s0102g01190 | 5 | 23751980 | chr05_23330243_23338137 | Calreticulin-3 precursor | marker_transition | |||||
| VIT_05s0094g00350 | 5 | 24291499 | chr05_23662399_23675612 | Chitinase class IV | DEG_common | |||||
| VIT_05s0094g01110 | 5 | 25073598 | chr05_24444498_24453092 | Acetolactate synthase small subunit | ||||||
| VIT_05s0094g01210 | 5 | 25136361 | chr05_24507261_24509135 | Amine oxidase | ||||||
| VIT_06s0009g02640 | 6 | 16547312 | chr06_15422302_15431585 | Hydroxymethylglutaryl-CoA lyase | ||||||
| VIT_06s0009g03190 | 6 | 17503185 | chr06_16378175_16390845 | Unknown protein | ||||||
| VIT_09s0070g00490 | 9 | 13760643 | chr09_13760643_13784740 | Protein phosphatase 2C | ||||||
| VIT_11s0016g03200 | 11 | 2568013 | chr11_02568013_02573551 | Gibberellin oxidase | marker_transition | |||||
| VIT_11s0016g03720 | 11 | 3049799 | chr11_03049799_03055105 | Aspartate aminotransferase, cytoplasmic (Transaminase A) | marker_transition | |||||
| VIT_11s0118g00230 | 11 | 5860792 | chr11_05843896_05869373 | ATSYTE/NTMC2T2.1/NTMC2TYPE2.1/SYTE | ||||||
| VIT_11s0118g00240 | 11 | 5886913 | chr11_05870017_05909727 | Purple acid phosphatase 26 | marker_transition | |||||
| VIT_12s0059g00810 | 12 | 5609780 | chr12_05609780_05612228 | B-cell receptor-associated protein 31 | ||||||
| VIT_12s0059g01080 | 12 | 5990364 | chr12_05990364_05993590 | Acid phosphatase/vanadium-dependent haloperoxidase | marker_transition | |||||
| VIT_13s0084g00050 | 13 | 21846264 | chr13_18556390_18561669 | Cysteine proteinase inhibitor | marker_transition | |||||
| VIT_14s0068g00640 | 14 | 24438706 | chr14_24438706_24450994 | Acetyl-CoA synthetase | marker_transition | |||||
| VIT_16s0098g00790 | 16 | 22650462 | chr16_21130941_21133087 | Copper-binding family protein | DEG_common | marker_transition | ||||
| VIT_17s0000g04890 | 17 | 5409996 | chr17_05278368_05292271 | D-aminoacyl-tRNA deacylase GEKO1 | ||||||
| VIT_17s0000g05580 | 17 | 6213229 | chr17_06081601_06089504 | Isopiperitenol dehydrogenase | DEG_common | DEG_atlas | marker_transition | |||
| VIT_17s0000g05960 | 17 | 6630398 | chr17_06498770_06499583 | Pectinesterase family | ||||||
| VIT_17s0000g06300 | 17 | 6963086 | chr17_06831458_06832686 | No hit | marker_transition | |||||
| VIT_17s0000g07490 | 17 | 8555284 | chr17_08423656_08438725 | Unknown protein | ||||||
| VIT_17s0000g07610 | 17 | 8721441 | chr17_08589813_08592495 | B12D protein | ||||||
| VIT_17s0000g07920 | 17 | 9030527 | chr17_08898899_08901368 | Hypoxia-responsive | ||||||
| VIT_18s0122g00410 | 18 | 368562 | chr18_00368562_00375676 | PPI1 (proton pump interactor 1 | ||||||
| VIT_18s0001g00890 | 18 | 1619182 | chr18_01619182_01620812 | Pentatricopeptide (PPR) repeat-containing | ||||||
| VIT_18s0001g02970 | 18 | 3057427 | chr18_03057427_03063862 | Glucose-6-phosphate/phosphate translocator related | ||||||
| VIT_18s0001g03950 | 18 | 3587990 | chr18_03587990_03596239 | Pm27 protein | marker_transition | |||||
| VIT_18s0001g08490 | 18 | 6935594 | chr18_06935594_06939798 | Early-responsive to dehydration | marker_transition | |||||
| VIT_18s0001g11930 | 18 | 10174236 | chr18_10174236_10175399 | Thaumatin SCUTL2 | DEG_common | switch_common | DEG_atlas | marker_transition | ||
| VIT_18s0001g13950 | 18 | 11940712 | chr18_11940712_11949407 | RNA polymerase Rpa43 subunit | ||||||
| VIT_18s0001g02250 | 18 | 20579102 | chr18_random_02618009_02632515 | Ras-related protein Rab-7A | ||||||
| VIT_18s0075g00510 | 18 | 24952214 | chr18_22007280_22010150 | Calpain alkylated DNA repair protein alkB homolog 6 | ||||||
| VIT_18s0041g00740 | 18 | 28678219 | chr18_25261198_25262924 | UDP-glucose: anthocyanidin 5,3-O-glucosyltransferase | DEG_common | DEG_atlas | marker_transition |
Now I try to plot these genes as a heatmap before with raw values and then with the values normalized and centered by Clust
Example with Pinot 2012. I use the function I wrote to clean RNAseq datasets to clean PN12 raw dataset
Raw FPKM
options(knitr.kable.NA = '')
source("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/clean_RNAseq_datasets_windows.r")
dim(pn.12)
## [1] 29971 30
pn.12.cleaned <- clean_RNAseq_datasets_windows("PN12_FPKM_table.txt")
dim(pn.12.cleaned)
## [1] 19437 10
pn.12.cleaned$ID <- rownames(pn.12.cleaned)
pn.12.cleaned.log <- log2(pn.12.cleaned[,1:10])
pn.12.cleaned.log$ID <- rownames(pn.12.cleaned.log)
## raw raw values
kable(head(pn.12.cleaned[rownames(pn.12.cleaned) %in% thirteen , -11 ]),align="l")
| t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| VIT_03s0091g00240 | 7.385783 | 10.469067 | 13.379533 | 21.721367 | 22.487500 | 18.16630 | 14.483003 | 8.084137 | 7.617170 | 6.1112433 |
| VIT_12s0059g00670 | 1.930513 | 1.631103 | 1.411227 | 3.090993 | 4.838617 | 4.30138 | 3.271703 | 2.320103 | 1.556464 | 0.4801953 |
| VIT_18s0001g09900 | 9.452897 | 10.266453 | 7.660330 | 24.494367 | 29.082300 | 21.45623 | 14.472933 | 10.855890 | 6.323533 | 2.8722033 |
| VIT_14s0006g01660 | 16.221000 | 17.173633 | 17.008500 | 25.092033 | 22.168833 | 19.33003 | 15.489000 | 17.890900 | 12.078300 | 11.0676567 |
| VIT_18s0001g11260 | 85.197933 | 113.945667 | 109.638333 | 213.294667 | 255.716333 | 223.99800 | 195.114333 | 149.810000 | 136.956333 | 101.9110333 |
| VIT_17s0000g05550 | 8.238217 | 11.011500 | 13.010067 | 21.851267 | 22.387833 | 20.84993 | 16.842800 | 11.429910 | 9.461167 | 6.4447100 |
ggplot(melt(pn.12.cleaned[rownames(pn.12.cleaned) %in% thirteen , ], id.vars="ID"), aes(x=variable, y=value, group=ID)) + geom_line() + theme_bw() + ggtitle("pn12 thirteen raw FPKM")
heatmap.2(as.matrix(pn.12.cleaned[rownames(pn.12.cleaned) %in% thirteen , c(1:10) ]), Colv=F, trace="none",margins = c(8, 16))
kable(head(pn.12.cleaned[rownames(pn.12.cleaned) %in% sixtytwo, ]),align="l")
| t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_01s0011g00710 | 0.691529 | 1.68638 | 1.105790 | 2.980277 | 3.262777 | 2.466297 | 2.390553 | 2.003363 | 2.071133 | 1.561437 | VIT_01s0011g00710 |
| VIT_05s0102g00440 | 4.732187 | 6.34187 | 6.359550 | 6.177623 | 6.803440 | 6.351583 | 6.148123 | 6.223867 | 6.284473 | 6.447620 | VIT_05s0102g00440 |
| VIT_00s1338g00020 | 51.160933 | 70.88963 | 74.754633 | 111.880667 | 106.706000 | 113.289000 | 101.811033 | 95.869667 | 117.425000 | 99.276667 | VIT_00s1338g00020 |
| VIT_12s0059g01080 | 3.852520 | 4.36186 | 3.812033 | 6.606327 | 9.338753 | 10.905913 | 11.511100 | 15.190033 | 12.158700 | 11.441933 | VIT_12s0059g01080 |
| VIT_01s0011g06030 | 23.900000 | 24.05233 | 24.394900 | 27.139933 | 29.504100 | 31.305567 | 27.922933 | 27.137500 | 35.179167 | 34.854067 | VIT_01s0011g06030 |
| VIT_11s0016g03200 | 15.827533 | 25.68413 | 18.611067 | 50.675533 | 71.516667 | 74.403067 | 66.719667 | 72.140933 | 85.163067 | 92.235500 | VIT_11s0016g03200 |
ggplot(melt(pn.12.cleaned[rownames(pn.12.cleaned) %in% sixtytwo, ], id.vars="ID"), aes(x=variable, y=value, group=ID)) + geom_line() + theme_bw() + ggtitle("pn12 sixtytwo raw FPKM")
heatmap.2(as.matrix(pn.12.cleaned[rownames(pn.12.cleaned) %in% sixtytwo , c(1:10) ]), Colv=F, trace="none",margins = c(8, 16), cexRow=0.4)
## log2 version
kable(head(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% thirteen , ]),align="l")
| t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_03s0091g00240 | 2.8847509 | 3.3880609 | 3.7419559 | 4.441043 | 4.491051 | 4.183193 | 3.856289 | 3.015094 | 2.9292551 | 2.611466 | VIT_03s0091g00240 |
| VIT_12s0059g00670 | 0.9489845 | 0.7058482 | 0.4969497 | 1.628070 | 2.274595 | 2.104800 | 1.710042 | 1.214189 | 0.6382719 | -1.058307 | VIT_12s0059g00670 |
| VIT_18s0001g09900 | 3.2407565 | 3.3598660 | 2.9374065 | 4.614378 | 4.862069 | 4.423325 | 3.855285 | 3.440406 | 2.6607309 | 1.522158 | VIT_18s0001g09900 |
| VIT_14s0006g01660 | 4.0197909 | 4.1021234 | 4.0881840 | 4.649158 | 4.470461 | 4.272772 | 3.953172 | 4.161154 | 3.5943455 | 3.468278 | VIT_14s0006g01660 |
| VIT_18s0001g11260 | 6.4127465 | 6.8322022 | 6.7766085 | 7.736704 | 7.998400 | 7.807342 | 7.608176 | 7.226990 | 7.0975722 | 6.671166 | VIT_18s0001g11260 |
| VIT_17s0000g05550 | 3.0423321 | 3.4609391 | 3.7015564 | 4.449645 | 4.484643 | 4.381971 | 4.074060 | 3.514742 | 3.2420181 | 2.688115 | VIT_17s0000g05550 |
ggplot(melt(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% thirteen , ], id.vars="ID"), aes(x=variable, y=value, group=ID)) + geom_line() + theme_bw() + ggtitle("pn12 thirteen log2 FPKM")
heatmap.2(as.matrix(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% thirteen , c(1:10) ]), Colv=F, trace="none",margins = c(8, 16))
kable(head(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% sixtytwo, ]),align="l")
| t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| VIT_01s0011g00710 | -0.5321383 | 0.7539297 | 0.1450774 | 1.575446 | 1.706100 | 1.302346 | 1.257345 | 1.002424 | 1.050420 | 0.6428741 | VIT_01s0011g00710 |
| VIT_05s0102g00440 | 2.2425070 | 2.6649083 | 2.6689247 | 2.627052 | 2.766264 | 2.667116 | 2.620146 | 2.637811 | 2.651792 | 2.6887667 | VIT_05s0102g00440 |
| VIT_00s1338g00020 | 5.6769707 | 6.1475028 | 6.2240911 | 6.805817 | 6.737497 | 6.823864 | 6.669750 | 6.583003 | 6.875596 | 6.6333828 | VIT_00s1338g00020 |
| VIT_12s0059g01080 | 1.9458024 | 2.1249435 | 1.9305607 | 2.723848 | 3.223230 | 3.447039 | 3.524954 | 3.925053 | 3.603917 | 3.5162589 | VIT_12s0059g01080 |
| VIT_01s0011g06030 | 4.5789387 | 4.5881050 | 4.6085077 | 4.762345 | 4.882843 | 4.968347 | 4.803379 | 4.762216 | 5.136649 | 5.1232551 | VIT_01s0011g06030 |
| VIT_11s0016g03200 | 3.9843645 | 4.6828055 | 4.2180888 | 5.663218 | 6.160208 | 6.217290 | 6.060040 | 6.172746 | 6.412156 | 6.5272502 | VIT_11s0016g03200 |
ggplot(melt(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% sixtytwo, ], id.vars="ID"), aes(x=variable, y=value, group=ID)) + geom_line() + theme_bw() + ggtitle("pn12 sixtytwo log2 FPKM")
is.na(pn.12.cleaned.log) <- sapply(pn.12.cleaned.log, is.infinite)
heatmap.2(as.matrix(pn.12.cleaned.log[rownames(pn.12.cleaned.log) %in% sixtytwo, c(1:10) ]), Colv=F, trace="none",margins = c(8, 16), cexRow=0.4)
Data transformed by Clust
options(knitr.kable.NA = '')
## load data processed by Clust from single dataframe analysis
pn.12.processed.single <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/Before-correction/Results_28_Nov_17/Results_28_Nov_17_PN-2012/Processed_Data/pn_2012.txt_processed.tsv", head=T, sep="\t", na.strings="")
dim(pn.12.processed.single)
## [1] 5223 11
kable(head(pn.12.processed.single[pn.12.processed.single$Genes %in% thirteen, ]),align="l")
| Genes | t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 816 | VIT_02s0025g01610 | -1.2685294 | -1.2146402 | -1.2621850 | 0.2547503 | 1.939204 | 1.1392424 | 0.4441945 | -0.0301788 | -0.0217308 | 0.0198727 |
| 1490 | VIT_03s0063g01870 | -1.4768386 | -0.6324725 | -1.4149163 | 0.8093506 | 1.139652 | 1.2577611 | 1.1086311 | 0.0990995 | -0.0944560 | -0.7958107 |
| 1625 | VIT_03s0091g00240 | -1.3130570 | -0.7394178 | -0.4419257 | 1.1067932 | 1.674054 | 0.9832702 | 0.9005157 | -0.4057954 | -0.8215623 | -0.9428748 |
| 2828 | VIT_12s0059g00670 | -0.6094980 | -0.8946531 | -1.0543887 | 0.1028155 | 1.464734 | 1.4937000 | 1.0919396 | 0.3773177 | -0.8275707 | -1.1443961 |
| 3074 | VIT_14s0006g01660 | -0.6352968 | -0.8455768 | -1.0250977 | 1.4035965 | 1.328436 | 0.9842226 | 0.4522699 | 0.5975094 | -1.2060074 | -1.0540558 |
| 3676 | VIT_16s0098g00790 | -1.2518436 | -1.1961095 | -1.2897724 | 0.1529066 | 1.608353 | 1.4302411 | 0.7463269 | 0.1646176 | -0.3094673 | -0.0552529 |
ggplot(melt(pn.12.processed.single[pn.12.processed.single$Genes %in% thirteen, ], id.vars="Genes"), aes(x=variable, y=value, group=Genes)) + geom_line() + theme_bw() + ggtitle("pn12 thirteen by clust single datasets")
rownames(pn.12.processed.single) <- pn.12.processed.single$Genes
heatmap.2(as.matrix(pn.12.processed.single[rownames(pn.12.processed.single) %in% thirteen, -1]), Colv=F, trace="none",margins = c(8, 16))
## load data processed by Clust from multiple dataframe analysis
pn.12.processed.together <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/After-correction/Results_06_Dec_17/Results_06_Dec_17_pn/Processed_Data/pn_2012.txt_processed.tsv", head=T, sep="\t", na.strings="")
kable(head(pn.12.processed.together[pn.12.processed.together$Genes %in% sixtytwo, ]),align="l")
| Genes | t.2 | t.1 | t0 | t1 | t2 | t3 | t4 | t5 | t6 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 40 | VIT_00s1338g00020 | -1.7561878 | -1.699453 | -0.0412998 | 0.0947518 | 0.8082985 | 0.8215968 | 0.3620912 | 1.2357449 | 0.1744579 |
| 63 | VIT_01s0011g00710 | -1.3770076 | -1.975395 | -0.0003789 | 1.1926859 | 0.7237330 | 0.9062470 | 0.5469846 | 0.2511392 | -0.2680081 |
| 102 | VIT_01s0011g01290 | -1.2561277 | -1.731250 | -0.8276336 | 0.0723928 | 0.8912415 | 1.4644438 | 0.3075506 | 0.7832809 | 0.2961015 |
| 120 | VIT_01s0011g01520 | -0.8440816 | -2.189173 | 0.6724090 | 0.8727039 | 1.2559142 | 0.5960390 | 0.2692533 | -0.1218086 | -0.5112558 |
| 185 | VIT_01s0011g02310 | -1.5573986 | -1.825716 | -0.3402707 | 0.1635896 | 0.7572557 | 1.1911618 | 0.8209424 | 0.6402520 | 0.1501832 |
| 293 | VIT_01s0011g04050 | -1.0470033 | -1.105261 | -1.3640637 | -0.2150218 | 0.5371196 | 0.3529074 | -0.0424109 | 1.1236872 | 1.7600468 |
ggplot(melt(pn.12.processed.together[pn.12.processed.together$Genes %in% sixtytwo, ], id.vars="Genes"), aes(x=variable, y=value, group=Genes)) + geom_line() + theme_bw() + ggtitle("pn12 sixtytwo by clust all 3 years")
rownames(pn.12.processed.together) <- pn.12.processed.together$Genes
heatmap.2(as.matrix(pn.12.processed.together[rownames(pn.12.processed.together) %in% sixtytwo, -1 ]), Colv=F, trace="none",margins = c(8, 16), cexRow=0.4)
We decided to focus our analysis on the clusters produced by the analysis of all the 3 Pinot datasets together, that is the 62 up and 577 down regulated genes obtained by running Clust with all the 3 ds together
Now I retrieve the down-regulated genes as obtained by Clust on the 3 years datasets integrated analysis
## this file is the same as pn.up.together but I load it again anyway
setwd("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/After-correction/Results_06_Dec_17/Results_06_Dec_17_pn/")
pn.down.together <- read.table("Clusters_Objects.tsv", head=T, sep="\t", na.strings="")
pn.down.together <- pn.down.together[2:nrow(pn.down.together),]
dim(pn.down.together)
## [1] 565 9
Among these clusters the profiles we are interested in are the ones from clusters C7 and C8
options(knitr.kable.NA = '')
## I already know that these are 577 =)
five77 <- sort(unique(as.vector(as.matrix(pn.down.together[,c(8,9)]))))
kable(head(five77),align="l")
| VIT_00s0229g00100 |
| VIT_00s0229g00180 |
| VIT_00s0965g00010 |
| VIT_01s0011g00590 |
| VIT_01s0011g00610 |
| VIT_01s0011g00660 |
## Who are these genes?
kable(head(new_afi_definitive[new_afi_definitive$gene %in% five77 , c(1,2,3,90:97)]), align="l")
| gene | chr | start | chr_position_12X | annotation_V1 | DEG_common | switch_common | NAC | DEG_atlas | switch_atlas | marker_transition |
|---|---|---|---|---|---|---|---|---|---|---|
| VIT_01s0011g00590 | 1 | 543821 | chr01_00543821_00549831 | Acclimation of photosynthesis to environment | DEG_common | DEG_atlas | ||||
| VIT_01s0011g00610 | 1 | 558179 | chr01_00558179_00562815 | Unknown protein | DEG_common | |||||
| VIT_01s0011g00660 | 1 | 590563 | chr01_00590563_00592859 | Pentatricopeptide (PPR) repeat | ||||||
| VIT_01s0011g00730 | 1 | 649069 | chr01_00649069_00654392 | Alpha-1,4-glucan phosphorylase type H | DEG_atlas | |||||
| VIT_01s0011g00930 | 1 | 793778 | chr01_00793778_00796517 | FK506-binding protein genes family (VvFKBP13) | DEG_common | DEG_atlas | marker_transition | |||
| VIT_01s0011g01320 | 1 | 1121407 | chr01_01121407_01125878 | Cholinephosphate cytidylyltransferase |
Plot of data processed by Clust from multiple dataframe analysis
pn.12.processed.together <- read.table("X:/BIOINFORMATICS/Dropbox/PETER/METAQTL/QTLDB/analisi_mqtl/analisi_mqtl_070317/dataset_trascrittomica/nuovi_dataset/cluster_analysis-heatmap/clust/Results/After-correction/Results_06_Dec_17/Results_06_Dec_17_pn/Processed_Data/pn_2012.txt_processed.tsv", head=T, sep="\t", na.strings="")
ggplot(melt(pn.12.processed.together[pn.12.processed.together$Genes %in% five77, ], id.vars="Genes"), aes(x=variable, y=value, group=Genes)) + geom_line() + theme_bw() + ggtitle("pn12 five77 by clust all 3 years")
rownames(pn.12.processed.together) <- pn.12.processed.together$Genes
heatmap.2(as.matrix(pn.12.processed.together[rownames(pn.12.processed.together) %in% five77, -1 ]), Colv=F, trace="none",margins = c(8, 16), cexRow=0.3)
Gene Ontology from BiomaRt
options(knitr.kable.NA = '')
plant<-useMart("plants_mart",dataset="vvinifera_eg_gene", host="plants.ensembl.org")
attributes <- listAttributes(plant)
go_attributes <- attributes[27:33,1]
kable(go_attributes, align="l")
| go_id |
| name_1006 |
| definition_1006 |
| go_linkage_type |
| namespace_1003 |
| goslim_goa_accession |
| goslim_goa_description |
sixtytwo.go <- getBM(attributes=c('ensembl_gene_id','chromosome_name', go_attributes),filters ='ensembl_gene_id', values = sixtytwo, mart = plant)
## No encoding supplied: defaulting to UTF-8.
DT::datatable(sixtytwo.go, rownames = F, filter = "top")
ggplot(sixtytwo.go, aes(fct_infreq(go_id))) + geom_bar() + theme(axis.text.x=element_text(angle=90,hjust=1))
#ggplot(c1, aes(fct_infreq(goslim_goa_accession))) + geom_bar()
#c1[grepl("GO:0016020",c1$go_id),]